---
title: "07 Mixed Effects Model - Children"
author: "Ye Shen, Radhika Tampi, Raj Vatsa"
date: "11/29/2021"
output: pdf_document
editor_options: 
  chunk_output_type: console
---

```{r, results = 'hide', message = FALSE}
knitr::opts_chunk$set("warning"=FALSE)
library(tidyverse)
library(survey)
library(WeightIt)
library(survey)
library(questionr) # use survey design in ggplot
library(patchwork)
library(cobalt)
library(gt)
library(lme4)
library(arm)
library(lmerTest)
library(optimx)
library(gridExtra)
```

## Read and Subset Cleaned Data

```{r}
children_alldisease_did<-readRDS("data/cleaned_children_did_alldis.rds")

# Survey design of data set to account for sample weights
svy_children_alldisease_did <- svydesign(
  id = ~cluster, strata = ~group, weights = ~wmweight,
  data = children_alldisease_did
)

# Subsets of data from 2014 and 2016 surveys
svy_children_alldisease_2014<-subset(svy_children_alldisease_did, year==0)
svy_children_alldisease_2016<-subset(svy_children_alldisease_did, year==1)
```


### DiD regression with village-level random effects, adjusting for wealth index score
  * Outcome: Access to any public_level care (Visit to a PHF or CHW)
  
```{r}
# Weighted linear mixed effects model on care seeking among children
# Adjusted for household wealth index score
did_children_mixed <- lmer(ALL.public_level ~ year * group + menage_score_bien_etre + 
                             (0 + year | cluster) + (0 + group | cluster) + 
                             (1 + year:group | cluster),
                           data = children_alldisease_did,
                           weights = wmweight,
                           control = lmerControl(optimizer = "bobyqa"))
summary(did_children_mixed)

# Unadjusted and unweighted DID regression on care seeking among children
did_children_unwtd_unadj <- svyglm(ALL.public_level ~ year*group, 
                                   design = svy_children_alldisease_did, 
                                   family = gaussian)
summary(did_children_unwtd_unadj)

# Adjusted and unweighted DID regression on care seeking among children
did_children_unwtd_adj <- svyglm(ALL.public_level ~ year*group + menage_score_bien_etre, 
                                   design = svy_children_alldisease_did, 
                                   family = gaussian)
summary(did_children_unwtd_adj)
```

## Table 2 for children with any illnesses
 * Outcome: Access to any public_level care (Visit to a PHF or CHW)

### Prepare Table 2 for children with any illnesses 

```{r}
table2_children <- 
  as.data.frame(array(data = NA, dim = c(2, 22)))
# "Indicator","Study group", "Denominator", "Numerator", "Percent (SE)", "Difference 
# (p-value)","Denominator", "Numerator", "Percent (SE)", "Difference (p-value)","Percent 
# relative change", "Percent absolute change", "Difference in differences (p-value)"
colnames(table2_children) <- 
  c("indicator", "group", "d_2014", "n_2014", "m_2014", "se_2014", "dif_2014", "p-val_2014", 
    "d_2016", "n_2016", "m_2016", "se_2016", "dif_2016", "p-val_2016", "trends_rel", 
    "trends_abs", "dif.in.dif(coef)_unadj_unwt", "p-val_did_unadj_unwt",
    "dif.in.dif(coef)_adj_unwt", "p-val_did_adj_unwt",
    "dif.in.dif(coef)_mixed", "p-val_did_mixed")
table2_children$indicator <- "ALL.public_level"

# Any visit (Access)
## Summary stats for 2014
table2_children[rev(c(1,2)), 2] <- c(0,1)
table2_children[rev(c(1,2)), 3] <- 
    round(svytable( ~group, design = svy_children_alldisease_2014)) # 'd_2014'
table2_children[rev(c(1,2)), 4] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2014, 
                     FUN = svytotal, keep.var = T, na.rm = T))) # 'n_2014'
table2_children[rev(c(1,2)), 5] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2014, 
                     FUN = svymean, keep.var = T, na.rm = T)) * 100, 2) # 'm_2014'
table2_children[rev(c(1,2)), 6] <- 
    round(SE(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2014, 
                   FUN = svymean,keep.var = T, na.rm = T)) * 100, 2) # 'se_2014'
table2_children[rev(c(1,2)), 7] <- table2_children[1, 5] - table2_children[2, 5]# 'dif_2014'

### Estimate p-value of the 2014 difference with a Chi2 test
  if (table2_children[1, 4] == 0 & 
      table2_children[2, 4] == 0) {
    table2_children[2, 8] <- NA
  } else {
    table2_children[2, 8] <- 
      round(svychisq(~ALL.public_level+group, 
                     design = svy_children_alldisease_2014)$p.value, 2)
  }

## Summary stats for 2016
table2_children[rev(c(1,2)), 9] <- 
    round(svytable( ~group, design = svy_children_alldisease_2016)) # 'd_2016'
table2_children[rev(c(1,2)), 10] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2016, 
                     FUN = svytotal,keep.var = T, na.rm = T))) # 'n_2016'
table2_children[rev(c(1,2)), 11] <- 
    round(coef(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2016, 
                     FUN = svymean, keep.var = T, na.rm = T))*100,2) # 'm_2016'
table2_children[rev(c(1,2)), 12] <- 
    round(SE(svyby(~ALL.public_level, by = ~group, design = svy_children_alldisease_2016, 
                   FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'se_2016'
table2_children[rev(c(1,2)), 13] <- 
  table2_children[1, 11] - table2_children[2, 11]# 'dif_2016'

### Estimate p-value of the 2016 difference with a Chi2 test
  if (table2_children[1, 10] == 0 & 
      table2_children[2, 10] == 0) {
    table2_children[2, 14] <- NA
  } else {
    table2_children[2, 14] <- 
      round(svychisq(~ALL.public_level+group, design = svy_children_alldisease_2016)$p.value,2)
  }

## Estimate values for trends 2014-2016
  table2_children[rev(c(1, 2)), 15] <- 
    round((table2_children[rev(c(1, 2)), 11] - 
       table2_children[rev(c(1, 2)), 5]) / 
    table2_children[rev(c(1, 2)), 5]*100,2) # 'trends_rel'
  table2_children[rev(c(1, 2)), 16] <- 
    table2_children[rev(c(1, 2)), 11] - 
    table2_children[rev(c(1, 2)), 5] # 'trends_abs'

## DiD stats 
  # Unadjusted DiD
    #coef
  table2_children[2, 17] <- 
    round(summary(did_children_unwtd_unadj)$coefficients[4, 1]*100,2)
    #P
  table2_children[2, 18] <- 
    round(summary(did_children_unwtd_unadj)$coefficients[4, 4],2)
  # Adjusted DiD
    #coef
  table2_children[2, 19] <- 
    round(summary(did_children_unwtd_adj)$coefficients[5, 1]*100,2)
    #P
  table2_children[2, 20] <- 
    round(summary(did_children_unwtd_adj)$coefficients[5, 4],2)
  # Mixed effects DiD: adjusted and weighted
    #coef
  table2_children[2, 21] <- round(summary(did_children_mixed)$coef[5,1] * 100, 2)
    #P
  table2_children[2, 22] <- round(summary(did_children_mixed)$coef[5,5], 2)
```

### Final Table 2 Children
```{r}
# Replicate final Table 2
final_table2_children <- data.frame(indicator=table2_children$indicator,
                                    group=table2_children$group,
                                    trends_rel= table2_children$trends_rel,
                                    trends_abs= table2_children$trends_abs, 
                                    d_2014=table2_children$d_2014, 
                                    d_2016=table2_children$d_2016)
final_table2_children$m_2014 <- sprintf("%.2f (%.2f)", table2_children$m_2014, 
                                       table2_children$se_2014)
final_table2_children$m_2016 <- sprintf("%.2f (%.2f)", table2_children$m_2016, 
                                       table2_children$se_2016)
final_table2_children$dif_2014 <- ifelse(is.na(table2_children$`p-val_2014`),"--" ,sprintf("%.2f (%.2f)", table2_children$dif_2014,table2_children$`p-val_2014`))
final_table2_children$dif_2016 <- ifelse(is.na(table2_children$`p-val_2016`),"--" ,sprintf("%.2f (%.2f)", table2_children$dif_2016,table2_children$`p-val_2016`))

final_table2_children$`dif.in.dif(unadj_unwt)` <- ifelse(is.na(table2_children$`p-val_did_unadj_unwt`),"--" , 
                                                        sprintf("%.2f (%.2f)", 
                                                                  table2_children$`dif.in.dif(coef)_unadj_unwt`,
                                                                table2_children$`p-val_did_unadj_unwt`))

final_table2_children$`dif.in.dif(adj_unwt)` <- ifelse(is.na(table2_children$`p-val_did_adj_unwt`),"--" , 
                                                        sprintf("%.2f (%.2f)", 
                                                                  table2_children$`dif.in.dif(coef)_adj_unwt`,
                                                                table2_children$`p-val_did_adj_unwt`))

final_table2_children$`dif.in.dif(mixed)` <- ifelse(is.na(table2_children$`p-val_did_mixed`),"--" , 
                                                        sprintf("%.2f (%.2f)", 
                                                                table2_children$`dif.in.dif(coef)_mixed`, 
                                                                table2_children$`p-val_did_mixed`))

final_table2_children <- as_tibble(final_table2_children) %>% 
  dplyr::select(-c(indicator)) %>%
  mutate(
    group = case_when(group == 1 ~ "IG", 
                      group == 0 ~ "NG")
   ) %>%
  rename("orig_DiD" = "dif.in.dif(unadj_unwt)",
         "orig_DiD_adj" = "dif.in.dif(adj_unwt)",
         "mixed_effects_DiD" = "dif.in.dif(mixed)")
  
col_order <- c("group", "d_2014", "d_2016", "m_2014", "m_2016", "dif_2014", "dif_2016",
               "trends_rel", "trends_abs", "orig_DiD", "orig_DiD_adj", "mixed_effects_DiD")
final_table2_children <- final_table2_children[, col_order]
saveRDS(final_table2_children, 
        "table_fig/child/final_table2children_mixed.rds")  

gt(final_table2_children ) %>%
  tab_header(title = "Table S7: Summary of responses and trends re: visits to a PHF or CHW for all children with symptoms of diarrhea, fever, and/or coughing with difficulty breathing from the 2014 and 2016 IHOPE surveys") %>%
  tab_style(style = cell_text(align = "left"), location = cells_title("title")) %>%
  tab_spanner(
    label = "2014",
    columns = c(d_2014, m_2014, dif_2014)
  ) %>%
  tab_spanner(
    label = "2016",
    columns = c(d_2016, m_2016, dif_2016)
  ) %>%
  tab_spanner(
    label = "2014-2016 Trend",
    columns = c(trends_rel, trends_abs, orig_DiD, orig_DiD_adj, mixed_effects_DiD)
  ) %>%
  tab_style(style = cell_text(weight = "bold"), 
            location = cells_column_spanners(spanners = everything())) %>%
    cols_label(group = "Study group", d_2014 = "N", m_2014 = "Percent (SE)", 
               dif_2014 = " Difference (p-value)", d_2016 = "N", 
               m_2016 = "Percent (SE)", dif_2016 = " Difference (p-value)", 
               trends_rel = "Percent relative change", 
               trends_abs = "Percent absolute change", orig_DiD="Original Unadjusted DiD (p value) ",
               orig_DiD_adj = "Original Adjusted DiD (p values)",
               mixed_effects_DiD="Mixed Effects DiD (p value)") %>%
    tab_style(style = cell_text(weight = "bold"), 
              location = cells_column_labels(columns = everything())) %>%
    gt::gtsave("table_fig/paper/Supplemental/table_s7_children_mixed_effect.png")
```

## Figure 2 Children
```{r}

num_var <- 1
fig_children_mixed <- as.data.frame(matrix(nrow = num_var * 4, ncol = 6))

###  UPDATE TABLE NAME AS NEEDED ###
fig_children_mixed$V1 <- round(c(
  table2_children$m_2014, table2_children$m_2016), 2)
fig_children_mixed$V2 <- round(c(
  table2_children$n_2014, table2_children$n_2016), 0)
fig_children_mixed$V3 <- round(c(
  table2_children$d_2014, table2_children$d_2016), 0)
fig_children_mixed$V4 <- rep(c("Intervention", "Non-Intervention"), num_var * 2)
fig_children_mixed$V5 <- rep(c("2014 survey", "2016 survey"), each = num_var * 2)
fig_children_mixed$V6 <- 
  rep("Any Sick-Child Visits to a PHF or CHW", num_var * 4) ### RENAME BASED ON OUTCOME ###
colnames(fig_children_mixed) <- c("mean", "num", "dem", "group", "year", "indicator")
fig_children_mixed$group <- factor(fig_children_mixed$group, levels = c("Non-Intervention", "Intervention"))
fig_children_mixed <- 
  ggplot(fig_children_mixed, 
         aes(x = mean, y = group, group = interaction(group, indicator), 
             label = sprintf("%s/%s", num, dem))) +
  geom_point(aes(colour = year), size = 3.5) +
  scale_colour_manual(breaks = fig_children_mixed$year, 
                      values = c("2014 survey" = "black", "2016 survey" = "red")) +
  geom_line() +
  labs(
    x = "Percentage", y = "",
    title = "Mixed Effects DiD with Village-Level Random Effects:\n Any Sick-Child Visits to a PHF or CHW" ### UPDATE TITLE BASED ON OUTCOME ###
  ) +
  theme_bw() +
  scale_x_continuous(limits = c(0, 100), breaks = seq(0, 100, 10), expand = c(0, 0)) +
  scale_y_discrete(position = "right") +
  theme(
    legend.position = "bottom", axis.text = element_text(size = 10), 
    panel.grid.minor = element_blank(), legend.title = element_blank(),
    title = element_text(size = 12, face = "bold"), 
    axis.title = element_text(size = 13),
    legend.text = element_text(size = 12)
  )+
  annotate("text", label="}", x=70, y=1.5, size =20, fontface="plain")+
  annotate("text", label=paste0("Adjusted DiD (p-val): \n",final_table2_children$mixed_effects_DiD[2]), x=85, y=1.5)
saveRDS(fig_children_mixed,"table_fig/child/fig_child_mixed.rds")
ggsave("table_fig/child/fig2children_mixed.png")
```

## Children Variation Figure

```{r}
# Empirical Bayes adjusted DiD estimates for each village
coefs_children <- coef(did_children_mixed)$cluster
coefs_children$village <- rownames(coefs_children)

# Identify villages with lowest and highest DiD estimates
village_min_child <- which.min(coefs_children$`year:group`)
village_max_child <- which.max(coefs_children$`year:group`)

# Plot for village with lowest DiD estimate
village_min_child_table <- data.frame(village = rep(village_min_child, 4),
                                time = c(2014, 2014, 2016, 2016),
                                intervention = c("Non-Intervention", 
                                                 "Intervention",
                                                 "Non-Intervention", 
                                                 "Intervention"),
                                outcome = c(coefs_children$`(Intercept)`[village_min_child],
                                            coefs_children$`(Intercept)`[village_min_child] + 
                                              coefs_children$group[village_min_child],
                                            coefs_children$`(Intercept)`[village_min_child] + 
                                              coefs_children$year[village_min_child],
                                            coefs_children$`(Intercept)`[village_min_child] + 
                                              coefs_children$year[village_min_child] +
                                              coefs_children$group[village_min_child] + 
                                              coefs_children$`year:group`[village_min_child]))
village_min_child_plot <- 
  ggplot(village_min_child_table, mapping = aes(x = time, y = outcome, color = factor(intervention))) + 
  geom_point() + geom_line() + 
  geom_segment(aes(x = 2014, 
                   y = coefs_children$`(Intercept)`[village_min_child] + 
                     coefs_children$group[village_min_child], 
                   xend = 2016, 
                   yend = coefs_children$`(Intercept)`[village_min_child] + 
                     coefs_children$year[village_min_child] +
                     coefs_children$group[village_min_child]), 
               color = "black", lty = 2) + 
  geom_segment(aes(x = 2016, 
                   y = coefs_children$`(Intercept)`[village_min_child] + 
                     coefs_children$year[village_min_child] +
                     coefs_children$group[village_min_child], 
                   xend = 2016, 
                   yend = coefs_children$`(Intercept)`[village_min_child] + 
                     coefs_children$year[village_min_child] +
                     coefs_children$group[village_min_child] +
                     coefs_children$`year:group`[village_min_child]), 
               color = "firebrick", lty = 3) + 
  scale_color_manual(values = c("Intervention" = "cornflowerblue", 
                                   "Non-Intervention" = "forestgreen")) + 
  scale_x_continuous(breaks = c(2014, 2016)) + 
  labs(x = "Year", y = "Proportion",
       title = "Children with Any Sick-Child Visit \nto a PHF or CHW",
       subtitle = paste0("Village #", village_min_child),
       color = "Study Group") + 
  theme_bw() + theme(legend.position = "bottom", panel.grid = element_blank(),
                     plot.subtitle = element_text(face = "bold")) + 
  annotate("text", label = "DiD\nEstimate",
           x = 2015.875, y = 0.21, size = 3, color = "firebrick")
saveRDS(village_min_child_plot, 
        "table_fig/child/village_min_child_plot.rds") 

# Plot for village with highest DiD estimate
village_max_child_table <- data.frame(village = rep(village_max_child, 4),
                                time = c(2014, 2014, 2016, 2016),
                                intervention = c("Non-Intervention", 
                                                 "Intervention",
                                                 "Non-Intervention", 
                                                 "Intervention"),
                                outcome = c(coefs_children$`(Intercept)`[village_max_child],
                                            coefs_children$`(Intercept)`[village_max_child] + 
                                              coefs_children$group[village_max_child],
                                            coefs_children$`(Intercept)`[village_max_child] + 
                                              coefs_children$year[village_max_child],
                                            coefs_children$`(Intercept)`[village_max_child] + 
                                              coefs_children$year[village_max_child] +
                                              coefs_children$group[village_max_child] + 
                                              coefs_children$`year:group`[village_max_child]))
village_max_child_plot <- 
  ggplot(village_max_child_table, mapping = aes(x = time, y = outcome, color = factor(intervention))) + 
  geom_point() + geom_line() + 
  geom_segment(aes(x = 2014, 
                   y = coefs_children$`(Intercept)`[village_max_child] + 
                     coefs_children$group[village_max_child], 
                   xend = 2016, 
                   yend = coefs_children$`(Intercept)`[village_max_child] + 
                     coefs_children$year[village_max_child] +
                     coefs_children$group[village_max_child]), 
               color = "black", lty = 2) + 
  geom_segment(aes(x = 2016, 
                   y = coefs_children$`(Intercept)`[village_max_child] + 
                     coefs_children$year[village_max_child] +
                     coefs_children$group[village_max_child], 
                   xend = 2016, 
                   yend = coefs_children$`(Intercept)`[village_max_child] + 
                     coefs_children$year[village_max_child] +
                     coefs_children$group[village_max_child] +
                     coefs_children$`year:group`[village_max_child]), 
               color = "firebrick", lty = 3) + 
  scale_color_manual(values = c("Intervention" = "cornflowerblue", 
                                   "Non-Intervention" = "forestgreen")) + 
  scale_x_continuous(breaks = c(2014, 2016)) + 
  labs(x = "Year", y = "Proportion",
       title = "Children with Any Sick-Child Visit to a PHF or CHW",
       subtitle = paste0("Village #", village_max_child),
       color = "Study Group") + 
  theme_bw() + theme(legend.position = "bottom", panel.grid = element_blank(),
                     plot.subtitle = element_text(face = "bold")) + 
  annotate("text", label = "DiD\nEstimate",
           x = 2015.875, y = 0.74, size = 3, color = "firebrick")
saveRDS(village_max_child_plot, 
        "table_fig/child/village_max_child_plot.rds") 

# Plot for village with mean DiD estimate
village_median_child_table <- data.frame(village = rep("Mean", 4),
                                time = c(2014, 2014, 2016, 2016),
                                intervention = c("Non-Intervention", 
                                                 "Intervention",
                                                 "Non-Intervention", 
                                                 "Intervention"),
                                outcome = c(fixef(did_children_mixed)[1],
                                            fixef(did_children_mixed)[1] + 
                                              fixef(did_children_mixed)[3],
                                            fixef(did_children_mixed)[1] + 
                                              fixef(did_children_mixed)[2],
                                            fixef(did_children_mixed)[1] + 
                                              fixef(did_children_mixed)[2] +
                                              fixef(did_children_mixed)[3] + 
                                              fixef(did_children_mixed)[5]))
village_median_child_plot <-
  ggplot(village_median_child_table, mapping = aes(x = time, y = outcome, color = factor(intervention))) + 
  geom_point() + geom_line() + 
  geom_segment(aes(x = 2014, 
                   y = fixef(did_children_mixed)[1] + 
                     fixef(did_children_mixed)[3], 
                   xend = 2016, 
                   yend = fixef(did_children_mixed)[1] + 
                     fixef(did_children_mixed)[2] +
                     fixef(did_children_mixed)[3]), 
               color = "black", lty = 2) + 
  geom_segment(aes(x = 2016, 
                   y = fixef(did_children_mixed)[1] + 
                     fixef(did_children_mixed)[2] +
                     fixef(did_children_mixed)[3], 
                   xend = 2016, 
                   yend = fixef(did_children_mixed)[1] + 
                     fixef(did_children_mixed)[2] +
                     fixef(did_children_mixed)[3] +
                     fixef(did_children_mixed)[5]), 
               color = "firebrick", lty = 3) + 
  scale_color_manual(values = c("Intervention" = "cornflowerblue", 
                                   "Non-Intervention" = "forestgreen")) + 
  scale_x_continuous(breaks = c(2014, 2016)) + 
  labs(x = "Year", y = "Proportion",
       title = "Children with Any Sick-Child Visit to a PHF or CHW",
       subtitle = "Median Village",
       color = "Study Group") + 
  theme_bw() + theme(legend.position = "bottom", panel.grid = element_blank(),
                     plot.subtitle = element_text(face = "bold")) + 
  annotate("text", label = "DiD\nEstimate",
           x = 2015.875, y = 0.5, size = 3, color = "firebrick")
saveRDS(village_median_child_plot, 
        "table_fig/child/village_median_child_plot.rds") 

# Save panel of plots

ggsave("table_fig/child/children_mixed_variation.png", 
       arrangeGrob(village_min_child_plot, village_median_child_plot, village_max_child_plot, ncol = 3),
       width = 15, height = 7.5, units = "in")

```

